library(vegan)
library(MASS)
envdata<-read.csv('env.csv',header=T,row.names=1)
sitegroup<-read.csv('Site_Groups.csv',header=T,row.names=1)
#log 10 transform envdata
envdata.tran<-data.trans(envdata,method='log',plot=F)
#LDA
env.lda<-lda(envdata.tran,sitegroup[,-2])
#look at it
env.lda
#compute the canonical scores (the scores for each obs on each canonical axis)
env.lda.pred<-predict(env.lda)
#look at it
env.lda.pred
#save the scores as a different object
scores<-env.lda.pred$x
scores
#graphically examine separation to get a sense of how well LDA worked
#bind the grouping variable to the data frame containing scores
result<-as.data.frame(cbind(sitegroup[,-2],scores))
#check it
result
#note: here it gives site group as numbers 1-4 not MidAt, Ohio etc.
#plot it
box.plots(result,var='LD1',by='V1',notch=TRUE,varwidth=TRUE)
hist.plots(result,var='LD1',by='V1')
#classification success?
#'confusion matrix' see how many were classified correctly
env.table<-table(sitegroup[,-2],env.lda.pred$class)
#look at it
env.table
#compute the overall correct classification rate (how many correct/correct + incorrect)
sum(diag(env.table))/sum(env.table)
#view the raw canonical coefficients
env.lda$scaling
#structure.. 'easier' to interpret than raw canonical coefficients
lda.structure(scores,envdata.tran)
#interpret: the most important variables that discriminate the groups along LD1 are elevation and road density
#LDA: biplot
plot(scores,type='n')
text(scores,labels=sitegroup[,-2])